rm(list=ls());gc()
##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 534959 28.6    1193748 63.8         NA   669411 35.8
## Vcells 991680  7.6    8388608 64.0      16384  1851784 14.2
library(MCMCglmm)
## Loading required package: Matrix
## Loading required package: coda
## Loading required package: ape
## Warning: package 'ape' was built under R version 4.3.3

First, we need a relatedness matrix We created two phenotypic data set to discuss the role of envionmental and genetic (co)variances in G-matrices estimations

NB: in this tutorial we won’t ensure the convergence of the MCMC models or investigate autocorrelation in the posterior samples for simplicity

This tutorial has two parts

Beginning of part 1

kin.inv=as(as.matrix(read.table('data/kinship.inverted.txt')),"dgCMatrix")
#dim(kin.inv) # 300
kin.inv[1:5,1:5]
## 5 x 5 sparse Matrix of class "dgCMatrix"
##            ind1       ind2        ind3        ind4       ind5
## ind1 6.39710602  0.2796442  0.09781572  0.02587626  0.1746951
## ind2 0.27964425  6.8068225 -0.41487478  0.16668272  0.1517489
## ind3 0.09781572 -0.4148748  6.58310344  0.19790564 -0.1249038
## ind4 0.02587626  0.1666827  0.19790564  6.84654734 -0.3577153
## ind5 0.17469514  0.1517489 -0.12490376 -0.35771526  7.0852689

We will estimate the genetic variance for each trait separately and then in a single multivariate model

## Load the first data set

phen1 <- read.table(file="data/Pop1_phenotypes.txt",sep='\t',h=TRUE)
phen2 <- read.table(file="data/Pop2_phenotypes.txt",sep='\t',h=TRUE)
phen=phen1
#Rename the individuals to match the kinship matrix
phen$ind=row.names(kin.inv)
## Plot and investigate the phenotype distibution as you want

## Produce here your favorite plot
plot(phen$height,phen$weight,pch=16)

#cor(phen$height,phen$weight) # 0.6519626

We can see that the traits are highly correlated, but is it genetics or environment?

Run a first model on height

nb_trait=1
p.var = var(phen$height)
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height"))))
prior1.1 = list(G=list(G1=list(V=matrix(phen.var/2),nu=1)),R=list(V=matrix(phen.var/2),nu=1))
#model1.1 = MCMCglmm(height~1,random=~ind,ginv=list(ind = kin.inv),data=phen,prior=prior1.1,nitt= 53000, thin= 500, burnin= 3000)
#save('model1.1',file='RData/model1.1.RData')
load('RData/model1.1.RData')

# Convergence checks from Wednesday's workshop
heidel.diag(model1.1[["Sol"]]);heidel.diag(model1.1[["VCV"]])
##                                           
##             Stationarity start     p-value
##             test         iteration        
## (Intercept) passed       1         0.818  
##                                        
##             Halfwidth Mean    Halfwidth
##             test                       
## (Intercept) failed    -0.0134 0.0109
##                                     
##       Stationarity start     p-value
##       test         iteration        
## ind   passed       1         0.691  
## units passed       1         0.454  
##                                 
##       Halfwidth Mean   Halfwidth
##       test                      
## ind   passed    0.0942 0.00749  
## units passed    0.9395 0.01522
autocorr.diag(model1.1[["Sol"]]);effectiveSize(model1.1[["Sol"]])
##           (Intercept)
## Lag 0      1.00000000
## Lag 500   -0.03122956
## Lag 2500  -0.19051614
## Lag 5000  -0.04781202
## Lag 25000 -0.08813440
## (Intercept) 
##         100
## Extract the genetic/residual variance estimates for the trait
plot(model1.1$VCV)

# Estimate genetic variance
apply(model1.1$VCV,2,median)
##        ind      units 
## 0.08765121 0.93442541
# Credible interval for the trait
HPDinterval(model1.1$VCV)
##            lower     upper
## ind   0.04373865 0.1878074
## units 0.81512265 1.1066037
## attr(,"Probability")
## [1] 0.95

Run a second model on weight

nb_trait=1
p.var<-var(phen$weight)
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("weight"))))
prior1.2<-list(G=list(G1=list(V=matrix(phen.var/2),nu=1)),R=list(V=matrix(phen.var/2),nu=1))
#model1.2<-MCMCglmm(weight~1,random=~ind,ginv=list(ind = kin.inv),data=phen,prior=prior1.2,nitt= 53000, thin= 500, burnin= 3000)
#save('model1.2',file='RData/model1.2.RData')
load('RData/model1.2.RData')

plot(model1.2$VCV)

apply(model1.2$VCV,2,median)
##       ind     units 
## 0.5801678 0.3939160
HPDinterval(model1.2$VCV)
##           lower     upper
## ind   0.4047552 0.8179593
## units 0.3196955 0.4844361
## attr(,"Probability")
## [1] 0.95

We see that two traits are highly phenotypically correlated, but models show one has very low heritability and one very high. We can run a multivariate model to estimate genetic (co)-variances between traits. So the environmental variance indeed is the reason behind phenotypic correlation.

Run a final model - multivariate

nb_trait=2
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height","weight"))))
prior1.3= list(G=list(G1=list(V=(phen.var/2),nu=2)),R=list(V=(phen.var/2),nu=2))
#model1.3<-MCMCglmm(cbind(height,weight)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,ginv=list(ind = kin.inv),data=phen,prior=prior1.3,
#                   family = rep("gaussian", nb_trait),nitt= 53000, thin= 500, burnin= 3000)
#save('model1.3',file='RData/model1.3.RData')
load('RData/model1.3.RData')

### Here we can extract a G-matrix of genetic (co)-variances
Gmat1 <- matrix(apply(model1.3$VCV[,1:4],2,median),nrow=2) # genetic co-variance matrix
#           [,1]       [,2]
#[1,] 0.10632641 0.08428513
#[2,] 0.08428513 0.45975364
matrix(apply(model1.3$VCV[,5:8],2,median),nrow=2) # phenotypic co-variance matrix (genetic + environment)
##           [,1]      [,2]
## [1,] 0.9371575 0.5748941
## [2,] 0.5748941 0.3970679
#           [,1]      [,2]
#[1,] 0.9371575 0.5748941
#[2,] 0.5748941 0.3970679
HPDinterval(model1.1$VCV)
##            lower     upper
## ind   0.04373865 0.1878074
## units 0.81512265 1.1066037
## attr(,"Probability")
## [1] 0.95
HPDinterval(model1.3$VCV)
##                                    lower     upper
## traitheight:traitheight.ind   0.06500519 0.1852113
## traitweight:traitheight.ind   0.01096517 0.1803158
## traitheight:traitweight.ind   0.01096517 0.1803158
## traitweight:traitweight.ind   0.33006258 0.5851530
## traitheight:traitheight.units 0.78990189 1.0881127
## traitweight:traitheight.units 0.49030694 0.6815341
## traitheight:traitweight.units 0.49030694 0.6815341
## traitweight:traitweight.units 0.33872217 0.4718267
## attr(,"Probability")
## [1] 0.95

Now we can compare the genetic/environmental variance estimated from both univariate and bivariate models and conclude on the source of phenotypic variance

Gmat shows genetic variance of trait1 (G11), trait2 (G12), and their co-varianc (G12) Similar for the environmental matrix, in the cor.test of 2 traits, we get R^2 = 0.66, this 0.66 is due to 0.57 environment and 0.08 only from genetics So the phenotypic correlation we got are actually environmental

Run similarly for Second population

phen = phen2
phen$ind=rownames(kin.inv)

### Explore the phenotypes as for population 1
plot(phen$height,phen$weight,pch=16)

#cor(phen$height,phen$weight) # 0.1711583

First model

nb_trait=1
p.var<-var(phen$height)
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height"))))
prior1.1<-list(G=list(G1=list(V=matrix(phen.var/2),nu=1)),R=list(V=matrix(phen.var/2),nu=1))
#model1.1_pop2<-MCMCglmm(height~1,random=~ind,ginv=list(ind = kin.inv),data=phen,prior=prior1.1,nitt= 53000, thin= 500, burnin= 3000)
#save('model1.1_pop2',file='RData/model1.1_pop2.RData')
load('RData/model1.1_pop2.RData')
plot(model1.1_pop2$VCV)

mean(model1.1_pop2$VCV)
## [1] 0.4934249
HPDinterval(model1.1_pop2$VCV)
##           lower     upper
## ind   0.3557060 0.8364216
## units 0.3129707 0.4819893
## attr(,"Probability")
## [1] 0.95

Second model

nb_trait=1
p.var<-var(phen$weight)
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("weight"))))
prior1.2<-list(G=list(G1=list(V=matrix(phen.var/2),nu=1)),R=list(V=matrix(phen.var/2),nu=1))
#model1.2_pop2<-MCMCglmm(weight~1,random=~ind,ginv=list(ind = kin.inv),data=phen,prior=prior1.2)
#save('model1.2_pop2',file='RData/model1.2_pop2.RData')
load('RData/model1.2_pop2.RData')

plot(model1.2_pop2$VCV)

mean(model1.2_pop2$VCV)
## [1] 1.189961
HPDinterval(model1.2_pop2$VCV)
##           lower    upper
## ind   0.5997141 1.722828
## units 1.0249987 1.542113
## attr(,"Probability")
## [1] 0.95

Bivariate model

nb_trait=2
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height","weight"))))
prior1.3<- list(G=list(G1=list(V=(phen.var/2),nu=2)),R=list(V=(phen.var/2),nu=2))
#model1.3_pop2<-MCMCglmm(cbind(height,weight)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,ginv=list(ind = kin.inv),data=phen,prior=prior1.3,
#                   family = rep("gaussian", nb_trait))
#save('model1.3_pop2',file='RData/model1.3_pop2.RData')
load('RData/model1.3_pop2.RData')

# Construct G/E matrices 
#plot(model1.3_pop2$VCV)
Gmat2 <- matrix(apply(model1.3_pop2$VCV[,1:4],2,median),nrow=2)
#            [,1]       [,2]
#[1,]  0.3333886 -0.1420900
#[2,] -0.1420900  0.6488772
matrix(apply(model1.3_pop2$VCV[,5:8],2,median),nrow=2)
##           [,1]      [,2]
## [1,] 0.4434704 0.7345557
## [2,] 0.7345557 1.3751386
#          [,1]      [,2]
#[1,] 0.4434704 0.7345557
#[2,] 0.7345557 1.3751386

HPDinterval(model1.1_pop2$VCV)
##           lower     upper
## ind   0.3557060 0.8364216
## units 0.3129707 0.4819893
## attr(,"Probability")
## [1] 0.95
HPDinterval(model1.3_pop2$VCV)
##                                    lower      upper
## traitheight:traitheight.ind    0.2351921 0.46385776
## traitweight:traitheight.ind   -0.2611148 0.02343029
## traitheight:traitweight.ind   -0.2611148 0.02343029
## traitweight:traitweight.ind    0.3882450 0.96417018
## traitheight:traitheight.units  0.3665088 0.53121112
## traitweight:traitheight.units  0.6061634 0.87863519
## traitheight:traitweight.units  0.6061634 0.87863519
## traitweight:traitweight.units  1.1382431 1.68177154
## attr(,"Probability")
## [1] 0.95

We will create a null distribution forG matrices and discuss significance of the observed from the random

Generate a null distribution for the covariance The issue with MCMCglmm is that variance estimates are positive definite

## Randomize the individual labels
#phen.rdm <- phen
#all.post.modes <- NULL
#for(i in 1:100){
#phen.rdm$ind = sample(phen.rdm$ind)
#model1.3_pop2.rdm<-MCMCglmm(cbind(height,weight)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,ginv=list(ind = kin.inv),data=phen.rdm,prior=prior1.3,
#                   family = rep("gaussian", nb_trait))
#all.post.modes=rbind(all.post.modes,posterior.mode(model1.3_pop2.rdm$VCV))
#}
#save("all.post.modes",file='Rdm.model1.3_pop2.RData')

load("Rdata/Rdm.model1.3_pop2.RData")
# Get the credible interval from the randomized G matrices
apply(all.post.modes,2,function(x){HPDinterval(as.mcmc(x))})
##      traitheight:traitheight.ind traitweight:traitheight.ind
## [1,]                  0.08622752                 -0.02542175
## [2,]                  0.15828279                  0.04040328
##      traitheight:traitweight.ind traitweight:traitweight.ind
## [1,]                 -0.02542175                   0.1750355
## [2,]                  0.04040328                   0.3195729
##      traitheight:traitheight.units traitweight:traitheight.units
## [1,]                     0.8652245                     0.1739903
## [2,]                     1.0038076                     0.3098415
##      traitheight:traitweight.units traitweight:traitweight.units
## [1,]                     0.1739903                      2.025782
## [2,]                     0.3098415                      2.279589
#traitheight:traitheight.ind traitweight:traitheight.ind
#[1,]                  0.08622752                 -0.02542175
#[2,]                  0.15828279                  0.04040328
#traitheight:traitweight.ind traitweight:traitweight.ind
#[1,]                 -0.02542175                   0.1750355
#[2,]                  0.04040328                   0.3195729
#traitheight:traitheight.units traitweight:traitheight.units
#[1,]                     0.8652245                     0.1739903
#[2,]                     1.0038076                     0.3098415
#traitheight:traitweight.units traitweight:traitweight.units
#[1,]                     0.1739903                      2.025782
#[2,]                     0.3098415                      2.279589

Plot the intervals with the median of the estimated G-matrix and conclude

library(ggplot2)

# Observed G-matrix values
Gmat_observed <- c(0.3334, 0.6489, -0.1421)

# HPD intervals from randomized G-matrices
HPD_lower <- c(0.0862, 0.1750, -0.0254)
HPD_upper <- c(0.1583, 0.3196, 0.0404)

# Labels for each element of G-matrix
labels <- c("Var(height)", "Var(weight)", "Cov(height, weight)")

# Create dataframe for plotting
df <- data.frame(
  Trait = labels,
  Observed = Gmat_observed,
  HPD_lower = HPD_lower,
  HPD_upper = HPD_upper
)

# Plot the HPD intervals with observed values
  # intervals show the intervals of HPD under random scenario (null distribution)
  # the dots show the data point we observed
library(ggplot2)
ggplot(df, aes(x = Trait, y = Observed)) +
  geom_point(size = 4, color = "salmon") +  # Observed G-matrix values
  geom_errorbar(aes(ymin = HPD_lower, ymax = HPD_upper), width = 0.2, color = "skyblue") +
  theme_minimal() +
  labs(y = "Genetic Variance/Covariance", x = "Matrix Element",
       title = "Observed G-matrix vs. Null Distribution HPD Intervals") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black")

We will then investigate how populations with different G matrices respond to selection NB: here we assume fix G-matrices that does not evolve only the mean population phenotype respond to selection

We can generate evolutionary path from the G-matrices The breeding value should align with the G matrix

## Plot the G-matrix as an ellipse
source("Rcode/99_Ellipse_function.R")
#plot(ellipse_coord(cor(phen1[,2:3])),xlim=c(-2,2),ylim=c(-2,2),type="l",col='skyblue') # P matrix
#lines(ellipse_coord(Gmat1)) # G matrix
#points(phen1$height/3,phen1$weight/3,pch=16,cex=.3)

library(ggplot2)

# Compute ellipse coordinates for P-matrix (phenotypic correlation) and G-matrix (genetic correlation)
P_ellipse <- as.data.frame(ellipse_coord(cor(phen1[, 2:3])))  # P-matrix
G_ellipse <- as.data.frame(ellipse_coord(Gmat1))  # G-matrix

# Rename columns for ggplot
colnames(P_ellipse) <- c("x", "y")
colnames(G_ellipse) <- c("x", "y")

# Scale the observed trait points for visualization
phen1_scaled <- data.frame(
  x = phen1$height / 3,
  y = phen1$weight / 3
)

# Plot the ellipses with ggplot
ggplot() +
  geom_path(data = P_ellipse, aes(x = x, y = y), color = "skyblue", size = 1) +  # P-matrix ellipse
  geom_path(data = G_ellipse, aes(x = x, y = y), color = "chartreuse4", size = 1) +  # G-matrix ellipse
  geom_point(data = phen1_scaled, aes(x = x, y = y), color = "grey", alpha = 0.3, size = 0.5) +  # Data points
  theme_minimal() +
  labs(title = "Comparison of G-matrix and P-matrix as Ellipses",
       x = "Trait 1 (Height)",
       y = "Trait 2 (Weight)") +
  coord_fixed()  # Ensure equal axis scaling
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Add more G matrix as needed

#create your own G matrix
Gmat3 = matrix(c(1,-0.3,-0.3,0.5),nrow=2)

G_ellipse2 = as.data.frame(ellipse_coord(Gmat3))  # G-matrix
colnames(G_ellipse2) = c("x", "y")

ggplot() +
  geom_path(data = P_ellipse, aes(x = x, y = y), color = "skyblue", size = 1) +  # P-matrix ellipse
  geom_path(data = G_ellipse, aes(x = x, y = y), color = "chartreuse4", size = 1) +  # G-matrix ellipse
  geom_path(data = G_ellipse2, aes(x = x, y = y), color = "salmon", size = 1) +  #  second G-matrix ellipse
  geom_point(data = phen1_scaled, aes(x = x, y = y), color = "grey", alpha = 0.3, size = 0.5) +  # Data points
  theme_minimal() +
  labs(title = "Comparison of G-matrix and P-matrix as Ellipses",
       x = "Trait 1 (Height)",
       y = "Trait 2 (Weight)") +
  coord_fixed()  # Ensure equal axis scaling

We apply a vector of selection on 1 trait (directional selection)

beta1 = c(3,0) # the vector to "move" the matrix

#plot(ellipse_coord(Gmat1),xlim=c(-2,2),ylim=c(-2,2),type="l")
#new_mean_pop1 <- Gmat1%*%beta1
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta1
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta1
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))


library(ggplot2)

# Generate ellipse coordinates for the G-matrix
G_ellipse = as.data.frame(ellipse_coord(Gmat1))
colnames(G_ellipse) <- c("x", "y")

# Compute mean shifts
new_mean_pop1 = Gmat1 %*% beta1 # move the matrix along the vector
mean_shifts = list()

for (i in 1:3) {
  shifted_ellipse = as.data.frame(t(matrix(rep(new_mean_pop1, 1000), nrow=2)) + ellipse_coord(Gmat1))
  colnames(shifted_ellipse) = c("x", "y")
  mean_shifts[[i]] = shifted_ellipse
  new_mean_pop1 = new_mean_pop1 + Gmat1 %*% beta1  # Update mean for next iteration
}

# Combine all shifts into a single dataframe
mean_shifts_df = do.call(rbind, Map(cbind, mean_shifts, shift_group = as.factor(1:3)))

# Plot using ggplot
ggplot() +
  geom_path(data = G_ellipse, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_path(data = mean_shifts_df, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("salmon", "skyblue", "chartreuse4")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Genetic Covariance Ellipse with Mean Shifts",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed()  # Keep aspect ratio fixed

The same with the second G-matrix

#plot(ellipse_coord(Gmat2),xlim=c(-2,2),ylim=c(-2,2),type="l")
#new_mean_pop2 <- Gmat2%*%beta1
#lines(t(matrix(rep(new_mean_pop2,1000),nrow=2))+ellipse_coord(Gmat2))
#new_mean_pop2 <- new_mean_pop2 + Gmat2%*%beta1
#lines(t(matrix(rep(new_mean_pop2,1000),nrow=2))+ellipse_coord(Gmat2))
#new_mean_pop2 <- new_mean_pop2 + Gmat2%*%beta1
#lines(t(matrix(rep(new_mean_pop2,1000),nrow=2))+ellipse_coord(Gmat2))

library(ggplot2)
# Generate ellipse coordinates for the G-matrix
G_ellipse2 = as.data.frame(ellipse_coord(Gmat2))
colnames(G_ellipse2) <- c("x", "y")

# Compute mean shifts
new_mean_pop2 = Gmat2 %*% beta1 # move the matrix along the vector
mean_shifts = list()

for (i in 1:3) {
  shifted_ellipse = as.data.frame(t(matrix(rep(new_mean_pop2, 1000), nrow=2)) + ellipse_coord(Gmat2))
  colnames(shifted_ellipse) = c("x", "y")
  mean_shifts[[i]] = shifted_ellipse
  new_mean_pop2 = new_mean_pop2 + Gmat2 %*% beta1  # Update mean for next iteration
}

# Combine all shifts into a single dataframe
mean_shifts_df = do.call(rbind, Map(cbind, mean_shifts, shift_group = as.factor(1:3)))

# Plot using ggplot
ggplot() +
  geom_path(data = G_ellipse2, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_path(data = mean_shifts_df, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("salmon", "skyblue", "chartreuse4")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Genetic Covariance Ellipse with Mean Shifts",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed()  # Keep aspect ratio fixed

We can generate any G-matrix and simulate the population response to selection

Gmat3 = matrix(c(1,-0.3,-0.3,0.5),nrow=2)
#plot(ellipse_coord(Gmat3),xlim=c(-5,5),ylim=c(-5,5),type="l")
#new_mean_pop3 <- Gmat3%*%beta1
#lines(t(matrix(rep(new_mean_pop3,1000),nrow=2))+ellipse_coord(Gmat3))
#new_mean_pop3 <- new_mean_pop3 + Gmat3%*%beta1
#lines(t(matrix(rep(new_mean_pop3,1000),nrow=2))+ellipse_coord(Gmat3))

# Generate ellipse coordinates for the G-matrix
G_ellipse3 = as.data.frame(ellipse_coord(Gmat3))
colnames(G_ellipse3) <- c("x", "y")

# Compute mean shifts
new_mean_pop3 = Gmat3 %*% beta1 # move the matrix along the vector
mean_shifts = list()

for (i in 1:3) {
  shifted_ellipse = as.data.frame(t(matrix(rep(new_mean_pop3, 1000), nrow=2)) + ellipse_coord(Gmat3))
  colnames(shifted_ellipse) = c("x", "y")
  mean_shifts[[i]] = shifted_ellipse
  new_mean_pop3 = new_mean_pop3 + Gmat3 %*% beta1  # Update mean for next iteration
}

# Combine all shifts into a single dataframe
mean_shifts_df = do.call(rbind, Map(cbind, mean_shifts, shift_group = as.factor(1:3)))

# Plot using ggplot
ggplot() +
  geom_path(data = G_ellipse3, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_path(data = mean_shifts_df, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("salmon", "skyblue", "chartreuse4")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Genetic Covariance Ellipse with Mean Shifts",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed()  # Keep aspect ratio fixed

Alternatively, we can define a phenotypic optimum and a gradient that linearly increase with phenotypic distance to the optimum

•   Black ellipse → Original G-matrix covariance structure.
•   Colored ellipses → Successive shifts in the mean phenotype.
#phen_optimum <- c(0,4)
#new_mean_pop1 <- c(0,0)
#plot(ellipse_coord(Gmat1),xlim=c(-.5,5),ylim=c(-.5,5),type="l")
#points(phen_optimum[1],phen_optimum[2],pch=16,col="red")
#beta_opt <- phen_optimum-new_mean_pop1
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta_opt
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#beta_opt <- phen_optimum-new_mean_pop1
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta_opt
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#beta_opt <- phen_optimum-new_mean_pop1
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta_opt
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))



library(ggplot2)

# Define the phenotypic optimum and starting mean position
phen_optimum = c(0, 4)
new_mean_pop1 = c(0, 0)

# Compute the G-matrix ellipse
G_ellipse = as.data.frame(ellipse_coord(Gmat1))
colnames(G_ellipse) = c("x", "y")

# Store mean shift ellipses
mean_shifts = list()

# Compute the first shift
beta_opt = phen_optimum - new_mean_pop1
new_mean_pop1 = new_mean_pop1 + Gmat1 %*% beta_opt

# Compute and store 3 iterative mean shifts
for (i in 1:3) {
  shifted_ellipse = as.data.frame(t(matrix(rep(new_mean_pop1, 1000), nrow=2)) + ellipse_coord(Gmat1))
  colnames(shifted_ellipse) = c("x", "y")
  mean_shifts[[i]] = shifted_ellipse
  
  # Update new mean
  beta_opt <- phen_optimum - new_mean_pop1
  new_mean_pop1 <- new_mean_pop1 + Gmat1 %*% beta_opt
}

# Combine shifts into a dataframe
mean_shifts_df = do.call(rbind, Map(cbind, mean_shifts, shift_group = as.factor(1:3)))

# Convert phenotypic optimum into a dataframe for plotting
optimum_df = data.frame(x = phen_optimum[1], y = phen_optimum[2])

# Plot using ggplot2
ggplot() +
  geom_path(data = G_ellipse, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_point(data = optimum_df, aes(x = x, y = y), color = "salmon", size = 4) +  # Phenotypic optimum
  geom_path(data = mean_shifts_df, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("skyblue", "chartreuse4", "darkgoldenrod1")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Evolutionary Mean Shifts Towards Phenotypic Optimum",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed(xlim = c(-0.5, 5), ylim = c(-0.5, 5))  # Set axis limits

Selection on trait 1 This plots a genetic covariance ellipse (G-matrix) and tracks mean shifts toward a defined phenotypic optimum. • The population mean starts at (0,0) and iteratively moves toward an optimum trait value at (0,4). • Each shift is calculated based on the difference between the current mean and the target optimum. • The shifts represent an adaptive trajectory where selection pushes the trait mean toward the optimum.

•   Black ellipse → Original G-matrix covariance structure.
•   Red point → Phenotypic optimum (target trait value).
•   Colored ellipses → Successive mean phenotype shifts toward the optimum.
#phen_optimum <- c(4,0)
#new_mean_pop1 <- c(0,0)
#plot(ellipse_coord(Gmat1),xlim=c(-.5,5),ylim=c(-.5,5),type="l")
#points(phen_optimum[1],phen_optimum[2],pch=16,col="red")
#beta_opt <- phen_optimum-new_mean_pop1
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta_opt
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#beta_opt <- phen_optimum-new_mean_pop1
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta_opt
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#beta_opt <- phen_optimum-new_mean_pop1
#new_mean_pop1 <- new_mean_pop1 + Gmat1%*%beta_opt
#lines(t(matrix(rep(new_mean_pop1,1000),nrow=2))+ellipse_coord(Gmat1))
#... can be continued
library(ggplot2)

# Define the phenotypic optimum and initial mean position
phen_optimum = c(4, 0)
new_mean_pop1 = c(0, 0)

# Compute the G-matrix ellipse
G_ellipse = as.data.frame(ellipse_coord(Gmat1))
colnames(G_ellipse) = c("x", "y")

# Store mean shift ellipses
mean_shifts = list()

# Compute the first shift
beta_opt = phen_optimum - new_mean_pop1
new_mean_pop1 = new_mean_pop1 + Gmat1 %*% beta_opt

# Compute and store 3 iterative mean shifts
for (i in 1:3) {
  shifted_ellipse <- as.data.frame(t(matrix(rep(new_mean_pop1, 1000), nrow=2)) + ellipse_coord(Gmat1))
  colnames(shifted_ellipse) <- c("x", "y")
  mean_shifts[[i]] <- shifted_ellipse
  
  # Update new mean
  beta_opt <- phen_optimum - new_mean_pop1
  new_mean_pop1 <- new_mean_pop1 + Gmat1 %*% beta_opt
}

# Combine shifts into a dataframe
mean_shifts_df <- do.call(rbind, Map(cbind, mean_shifts, shift_group = as.factor(1:3)))

# Convert phenotypic optimum into a dataframe for plotting
optimum_df <- data.frame(x = phen_optimum[1], y = phen_optimum[2])

# Plot using ggplot2
ggplot() +
  geom_path(data = G_ellipse, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_point(data = optimum_df, aes(x = x, y = y), color = "salmon", size = 4) +  # Phenotypic optimum
  geom_path(data = mean_shifts_df, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("skyblue", "chartreuse4", "darkgoldenrod1")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Evolutionary Mean Shifts Towards Phenotypic Optimum (Trait 1 Selection)",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed(xlim = c(-0.5, 5), ylim = c(-0.5, 5))  # Set axis limits

For pop2 and any pop similarly

#phen_optimum <- c(0,4)
#new_mean_pop2 <- c(0,0)
#plot(ellipse_coord(Gmat2),xlim=c(-.5,5),ylim=c(-.5,5),type="l")
#points(phen_optimum[1],phen_optimum[2],pch=16,col="red")
#beta_opt <- phen_optimum-new_mean_pop2
#new_mean_pop2 <- new_mean_pop2 + Gmat2%*%beta_opt
#lines(t(matrix(rep(new_mean_pop2,1000),nrow=2))+ellipse_coord(Gmat2))
#beta_opt <- phen_optimum-new_mean_pop2
#new_mean_pop2 <- new_mean_pop2 + Gmat2%*%beta_opt
#lines(t(matrix(rep(new_mean_pop2,1000),nrow=2))+ellipse_coord(Gmat2))

library(ggplot2)

# Function to compute mean shifts and return ellipse data
compute_mean_shifts <- function(G_matrix, phen_optimum, steps = 3) {
  mean_shifts <- list()
  new_mean <- c(0, 0)  # Initial population mean
  
  for (i in 1:steps) {
    beta_opt <- phen_optimum - new_mean
    new_mean <- new_mean + G_matrix %*% beta_opt
    shifted_ellipse <- as.data.frame(t(matrix(rep(new_mean, 1000), nrow=2)) + ellipse_coord(G_matrix))
    colnames(shifted_ellipse) <- c("x", "y")
    mean_shifts[[i]] <- cbind(shifted_ellipse, shift_group = as.factor(i))  # Store shift step
  }
  
  return(do.call(rbind, mean_shifts))
}

# Define phenotypic optimum
phen_optimum <- c(0, 4)

# Compute ellipses for each population
G_ellipse2 <- as.data.frame(ellipse_coord(Gmat2))
colnames(G_ellipse2) <- c("x", "y")
mean_shifts2 <- compute_mean_shifts(Gmat2, phen_optimum)

G_ellipse3 <- as.data.frame(ellipse_coord(Gmat3))
colnames(G_ellipse3) <- c("x", "y")
mean_shifts3 <- compute_mean_shifts(Gmat3, phen_optimum)

# Convert phenotypic optimum to dataframe
optimum_df <- data.frame(x = phen_optimum[1], y = phen_optimum[2])

# Plot for Population 2
p2 = ggplot() +
  geom_path(data = G_ellipse2, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_point(data = optimum_df, aes(x = x, y = y), color = "salmon", size = 4) +  # Phenotypic optimum
  geom_path(data = mean_shifts2, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("skyblue", "chartreuse3", "darkgoldenrod1")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Population 2: Evolutionary Mean Shifts",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed(xlim = c(-0.5, 5), ylim = c(-0.5, 5))  # Set axis limits

# Plot for Population 3
p3 = ggplot() +
  geom_path(data = G_ellipse3, aes(x = x, y = y), color = "black", size = 1) +  # G-matrix ellipse
  geom_point(data = optimum_df, aes(x = x, y = y), color = "salmon", size = 4) +  # Phenotypic optimum
  geom_path(data = mean_shifts3, aes(x = x, y = y, group = shift_group, color = shift_group), size = 1) +  # Shifted ellipses
  scale_color_manual(values = c("skyblue", "chartreuse3", "darkgoldenrod1")) +  # Different colors for each shift
  theme_minimal() +
  labs(title = "Population 3: Evolutionary Mean Shifts",
       x = "Trait 1",
       y = "Trait 2",
       color = "Shift Step") +
  coord_fixed(xlim = c(-2, 5), ylim = c(-2, 5))  # Set axis limits

p2

p3

Backsolving the breeding values to estimate SNP effect
We can use the genotype matrix to estimate SNP effects from breeding values Caution: GBLUP are shrunk (meaning extreme values are underestimated?) because of ridge regression (REML) or uninformative priors, so SNP estimates are not accurate using this method

load('RData/Backtransformation_matrices.RData')
## We need to run model1.3 again and keep the posterior distribution of the
## random effects by adding pr=TRUE
#nb_trait=2
#phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height","weight"))))
#prior1.3<- list(G=list(G1=list(V=(phen.var/2),nu=2)),R=list(V=(phen.var/2),nu=2))
#model1.3_pop2_with_rand.post<-MCMCglmm(cbind(height,weight)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,ginv=list(ind = kin.inv),data=phen,prior=prior1.3,
#                   family = rep("gaussian", nb_trait),nitt= 53000, thin= 500, burnin= 3000,pr=TRUE)
#save('model1.3_pop2_with_rand.post',file='RData/model1.3_pop2_with_rand_post.RData')
load('RData/model1.3_pop2_with_rand_post.RData')
col_subset = colnames(model1.3_pop2_with_rand.post$Sol)[grepl(paste0("weight.ind."), colnames(model1.3_pop2_with_rand.post$Sol))]
breeding_values = data.frame(model1.3_pop2_with_rand.post$Sol[, col_subset])
new_colnames = gsub(".*ind\\.", "", colnames(breeding_values))#
colnames(breeding_values) = new_colnames
# Convert breeding values to matrix
breeding_values_matrice = as.matrix(breeding_values)

## data frame of chr position
# I'm just making up a random SNP matrix here

genotypes = read.csv("data/Simulated_Genotype_Data.csv", row.names=1)
#only need 200 SNPs
genotypes = genotypes[,1:200]
snps_positions =  data.frame(chr='I',pos=1:ncol(genotypes))

# Calculate SNP effects
SNPs_effects.weight = M_transpose %*% MM_prime_inverse %*% t(breeding_values_matrice)

## And for height
col_subset <- colnames(model1.3_pop2_with_rand.post$Sol)[grepl(paste0("height.ind."), colnames(model1.3_pop2_with_rand.post$Sol))]
breeding_values <- data.frame(model1.3_pop2_with_rand.post$Sol[, col_subset])
new_colnames <- gsub(".*ind\\.", "", colnames(breeding_values))#
colnames(breeding_values) <- new_colnames
# Convert breeding values to matrix
breeding_values_matrice <- as.matrix(breeding_values)
SNPs_effects.height <- M_transpose %*% MM_prime_inverse %*% t(breeding_values_matrice)

snps_positions$weight <- apply(SNPs_effects.weight,1,median)
snps_positions$height <- apply(SNPs_effects.height,1,median)

### Load the SNP effect as generated
plot(snps_positions$height)

true_geno_effects=read.table("data/Pop2_geno_effects.txt",h=TRUE)
plot(snps_positions$weight,true_geno_effects$height)

summary(lm(snps_positions$height~true_geno_effects$weight))
## 
## Call:
## lm(formula = snps_positions$height ~ true_geno_effects$weight)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.090753 -0.020568  0.002637  0.026268  0.082732 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               6.645e-05  2.593e-03   0.026     0.98    
## true_geno_effects$weight -1.309e-01  2.715e-02  -4.820 2.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03631 on 198 degrees of freedom
## Multiple R-squared:  0.105,  Adjusted R-squared:  0.1005 
## F-statistic: 23.23 on 1 and 198 DF,  p-value: 2.856e-06
summary(lm(snps_positions$weight~true_geno_effects$height))
## 
## Call:
## lm(formula = snps_positions$weight ~ true_geno_effects$height)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.095351 -0.027590  0.003952  0.029384  0.098740 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -0.001077   0.003044  -0.354    0.724    
## true_geno_effects$height -0.183195   0.028973  -6.323 1.67e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04294 on 198 degrees of freedom
## Multiple R-squared:  0.168,  Adjusted R-squared:  0.1638 
## F-statistic: 39.98 on 1 and 198 DF,  p-value: 1.67e-09

End of part 1

Beginning of part 2

We will here split the G/E matrices using within/between line variance instead of using a pedigree or a kinship matrix (see previous workshops)

Load the first data set

phen = read.table(file="data/Pop1_phenotypes_second_WS.txt",sep='\t',h=TRUE)
head(phen)
##   ind     height      weight
## 1   1 -2.2445172 -0.66770736
## 2   2 -0.4380629  0.02265420
## 3   3  1.1791095 -0.12778455
## 4   4 -1.3845064  0.04073303
## 5   5 -0.7754505 -0.36484391
## 6   6 -0.8109581 -0.27744043

NB: some redundancy with the first workshop… but some students did not attend them Re-introduce rapidly the animal model / Introduce the Price equation

Run first an animal model with the two phenotypic traits Run a final model - multivariate

nb_trait=2
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height","weight"))))
prior1.3 = list(G=list(G1=list(V=(phen.var/2),nu=2)),R=list(V=(phen.var/2),nu=2))
#model1.3<-MCMCglmm(cbind(height,weight)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,data=phen,prior=prior1.3,
#                   family = rep("gaussian", nb_trait),nitt= 53000, thin= 500, burnin= 3000)
#save('model1.3',file='RData/model1.3_WS2.RData')
load('RData/model1.3_WS2.RData')
Gmat1 = matrix(apply(model1.3$VCV[,1:4],2,median),nrow=2)

Same thing for the population 2 that we will use later

phen2 = read.table(file="data/Pop2_phenotypes_second_WS.txt",sep='\t',h=TRUE)
head(phen2)
##   ind     height      weight
## 1   1 -1.1285808 -0.08775198
## 2   2  0.4538317 -0.18213257
## 3   3 -0.1416177 -2.62585728
## 4   4 -0.1575402  1.69044269
## 5   5 -0.5649287 -0.37458336
## 6   6 -1.2021183 -0.55627078
nb_trait=2
phen.var = diag(nb_trait) * diag(var(subset(phen2, select = c("height","weight"))))
prior1.3 = list(G=list(G1=list(V=(phen.var/2),nu=2)),R=list(V=(phen.var/2),nu=2))
#model1.3.pop2<-MCMCglmm(cbind(height,weight)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,data=phen2,prior=prior1.3,
#                   family = rep("gaussian", nb_trait),nitt= 53000, thin= 500, burnin= 3000)
#save('model1.3.pop2',file='RData/model1.3_pop2_WS2.RData')
load('RData/model1.3_pop2_WS2.RData')

Gmat2 = matrix(apply(model1.3.pop2$VCV[,1:4],2,median),nrow=2)

Generate fitness estimates for the individuals - add some noise Initial scenario where only one trait is under selection

w = 4-phen$height
phen$w=w/mean(w)

## STNS  - estimate a G matrix with fitness - to predict phenotypic evolution
nb_trait=3
phen.var = diag(nb_trait) * diag(var(subset(phen, select = c("height","weight","w"))))
prior1.4 = list(G=list(G1=list(V=(phen.var/3),nu=3)),R=list(V=(phen.var/3),nu=3))
#model1.4<-MCMCglmm(cbind(height,weight,w)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,data=phen,prior=prior1.4,
#                   family = rep("gaussian", nb_trait),nitt= 53000, thin= 500, burnin= 3000)
#save('model1.4',file='RData/model1.4.RData')
load('RData/model1.4.RData')
Gzw = matrix(apply(model1.4$VCV[,1:9],2,median),nrow=3)
print(Gzw)
##              [,1]        [,2]         [,3]
## [1,]  0.034491080  0.04453024 -0.007685951
## [2,]  0.044530238  0.54472688 -0.011167551
## [3,] -0.007685951 -0.01116755  0.002155797
###############################################################################
### Here we need to build a random estimates: are the estimates larger than  ##
### expected by chance ?                                                     ##
###############################################################################
phen.rand = phen
#post.Gzw <- NULL
#prior1.4<- list(G=list(G1=list(V=(phen.var/3),nu=3)),R=list(V=(phen.var/3),nu=3))
#for(i in 1:100){
#  print(i)
#phen.rand$ind=sample(phen.rand$ind)
#model1.4.rand<-MCMCglmm(cbind(height,weight,w)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,data=phen.rand,prior=prior1.4,
#                   family = rep("gaussian", nb_trait),verbose=FALSE)
#post.Gzw <- rbind(post.Gzw,apply(model1.4.rand$VCV,2,median))
#}
#save('post.Gzw',file='RData/post.Gzw.Rand_model4.RData')
load("RData/post.Gzw.Rand_model4.RData")
HPDinterval(as.mcmc(post.Gzw)[,c(3,6,9)])
##                               lower        upper
## traitw:traitheight.ind -0.008342513 -0.005449856
## traitw:traitweight.ind -0.004603998 -0.002157472
## traitw:traitw.ind       0.001572915  0.002295653
## attr(,"Probability")
## [1] 0.95
#lower        upper
#traitw:traitheight.ind -0.008342513 -0.005449856
#traitw:traitweight.ind -0.004603998 -0.002157472
#traitw:traitw.ind       0.001572915  0.002295653

apply(model1.4$VCV[,c(3,6,9)],2,median)
## traitw:traitheight.ind traitw:traitweight.ind      traitw:traitw.ind 
##           -0.007685951           -0.011167551            0.002155797
#traitw:traitheight.ind traitw:traitweight.ind      traitw:traitw.ind 
#-0.007685951           -0.011167551            0.002155797 

Recapitulate the selection gradient Now that we have the G-matrix and the expected phenotypic change over 1 generation, we can deduce the gradient

beta_coefs = solve(Gmat1)%*%Gzw[1:2,3]
print(beta_coefs)
##              [,1]
## [1,] -0.165201736
## [2,] -0.005679371
plot(phen$weight,phen$w)

summary(lm(height~w,data=phen))
## 
## Call:
## lm(formula = height ~ w, data = phen)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.080e-16 -2.130e-16 -5.800e-17  1.000e-16  1.777e-13 
## 
## Coefficients:
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)  4.000e+00  2.462e-16  1.624e+16   <2e-16 ***
## w           -4.000e+00  2.389e-16 -1.674e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.271e-15 on 2998 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 2.803e+32 on 1 and 2998 DF,  p-value: < 2.2e-16
summary(lm(weight~w,data=phen))
## 
## Call:
## lm(formula = weight ~ w, data = phen)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.58349 -0.54852 -0.00311  0.53547  2.52359 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.57348    0.05765   44.64   <2e-16 ***
## w           -2.57348    0.05593  -46.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7657 on 2998 degrees of freedom
## Multiple R-squared:  0.4139, Adjusted R-squared:  0.4137 
## F-statistic:  2117 on 1 and 2998 DF,  p-value: < 2.2e-16

And so we can build a credible interval for the beta coefficients

beta_distribution = NULL
for(i in 1:nrow(model1.4$VCV)){
  beta_distribution = cbind(beta_distribution,solve(matrix(model1.4$VCV[i,1:9],nrow=3)[1:2,1:2])%*%Gzw[1:2,3])
}

HPDinterval(as.mcmc(t(beta_distribution)))
##            lower        upper
## var1 -0.33413444 -0.144381136
## var2 -0.01630024  0.009251241
## attr(,"Probability")
## [1] 0.95

Generate different evolutionary scenario -> generate the Gzw matrices and see how it matches the expectations ?

We have created different fitness distribution for the population 2, you need to analyze them, predict the phenotypic evolution and deduce the selection on the different phenotypes

phen2$w=read.table("data/fitness_pop2.txt",h=TRUE)$x

nb_trait=3
phen.var = diag(nb_trait) * diag(var(subset(phen2, select = c("height","weight","w"))))
#prior1.4_pop2 <- list(G=list(G1=list(V=(phen.var/3),nu=3)),R=list(V=(phen.var/3),nu=3))
#model1.4_pop2<-MCMCglmm(cbind(height,weight,w)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,data=phen,prior=prior1.4_pop2,
#                   family = rep("gaussian", nb_trait))
#save('model1.4_pop2',file='RData/model1.4_pop2.RData')
load('RData/model1.4_pop2.RData')
Gzw_pop2 = matrix(apply(model1.4_pop2$VCV[,1:9],2,median),nrow=3)
#phen2.rand = phen2
#post.Gzw_pop2 <- NULL
#prior1.4<- list(G=list(G1=list(V=(phen.var/3),nu=3)),R=list(V=(phen.var/3),nu=3))
#for(i in 1:100){
#  print(i)
#phen2.rand$ind=sample(phen2.rand$ind)
#model1.4.rand2<-MCMCglmm(cbind(height,weight,w)~trait-1,random=~us(trait):ind,
#                   rcov=~us(trait):units,data=phen2.rand,prior=prior1.4,
#                   family = rep("gaussian", nb_trait),verbose=FALSE)
#post.Gzw_pop2 <- rbind(post.Gzw_pop2,apply(model1.4.rand2$VCV,2,median))
#}
#save('post.Gzw_pop2',file='RData/post.Gzw_pop2.Rand_model4.RData')
load('RData/post.Gzw_pop2.Rand_model4.RData')

Now the real thing - correlated selection

When populations are at the equilibrium, we can still estimate the gamma matrix i.e. the curvature of the fitness landscape.

1- Load relative fitness data and plot the data

phen2$w = read.table("data/fitness_pop2_quadratic.txt")$V1
phen2$w_log <- log(phen2$w)
color_gradient <- colorRampPalette(c("skyblue","salmon"))
color_index <- c(1+as.numeric((round(10*phen2$w))))
summary(color_index)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   8.000  10.000   8.926  11.000  15.000
colors <- color_gradient(15)
plot(phen2[,2],phen2[,3],col=adjustcolor(colors[color_index],alpha=.2),pch=16,cex=.6)

2- Fit quadratic regression model

library(lme4)
## Warning: package 'lme4' was built under R version 4.3.3
model <- lmer(w_log ~ height + weight + I(height^2/2) + I(weight^2/2) + I(height * weight) +(1|ind),data=phen2)
## boundary (singular) fit: see help('isSingular')
model <- lmer(w_log ~ -1 + I(height^2/2) + I(weight^2/2) + I(height * weight) +(1|ind),data=phen2)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: w_log ~ -1 + I(height^2/2) + I(weight^2/2) + I(height * weight) +  
##     (1 | ind)
##    Data: phen2
## 
## REML criterion at convergence: -194292.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.7523 -0.6585 -0.1936  0.0756  3.0421 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  ind      (Intercept) 3.778e-32 1.944e-16
##  Residual             4.029e-30 2.007e-15
## Number of obs: 3000, groups:  ind, 300
## 
## Fixed effects:
##                      Estimate Std. Error    t value
## I(height^2/2)      -1.000e-01  5.287e-17 -1.892e+15
## I(weight^2/2)      -4.000e-01  2.611e-17 -1.532e+16
## I(height * weight)  2.500e-01  3.209e-17  7.791e+15
## 
## Correlation of Fixed Effects:
##             I(h^2/2) I(w^2/2)
## I(wght^2/2) -0.097           
## I(hgh*wght) -0.478   -0.462  
## optimizer (nloptwrap) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues

3 - Extract gamma coefficients

gamma_11 <- coef(model)[[1]][1,"I(height^2/2)"]
gamma_22 <- coef(model)[[1]][1,"I(weight^2/2)"]
gamma_12 <- coef(model)[[1]][1,"I(height * weight)"]

4: Construct Gamma matrix

gamma_matrix <- matrix(c(gamma_11, gamma_12, gamma_12, gamma_22), nrow = 2, byrow = TRUE)
# Print Gamma matrix
print(gamma_matrix)
##       [,1]  [,2]
## [1,] -0.10  0.25
## [2,]  0.25 -0.40

Gamma matrix and its interpretation To interpret the gamma matrix, we use a rotation of the phenotypic space in order to interpret traits with independent direction of selection

eigen(gamma_matrix)
## eigen() decomposition
## $values
## [1]  0.04154759 -0.54154759
## 
## $vectors
##            [,1]       [,2]
## [1,] -0.8701999 -0.4926988
## [2,] -0.4926988  0.8701999
EV_gamma = eigen(gamma_matrix)$vectors

Eigenvectors describe the rotation of the phenotypic space They are the coefficient of the linear transformation needed to get eigentraits from our phenotypic traits

#plot(phen2[,2],phen2[,3],col=adjustcolor(colors[1+as.numeric((round(10*fitness)))],alpha=.2),pch=16,cex=.6,asp=1)
#abline(a=0,b=EV_gamma[2,1]/EV_gamma[1,1])
#abline(a=0,b=EV_gamma[2,2]/EV_gamma[1,2])

library(ggplot2)
library(scales)

# Convert fitness values to color scale
phen2$color = scales::alpha(colors[1 + as.numeric(round(10 * phen2$w))], alpha = 0.2)

# Create ggplot scatter plot
ggplot(phen2, aes(x = height, y = weight, color = color)) +
  geom_point(size = 0.6, alpha = 0.2) +  # Scatter plot with transparency
  geom_abline(slope = EV_gamma[2,1] / EV_gamma[1,1], intercept = 0, color = "black") +  # First abline
  geom_abline(slope = EV_gamma[2,2] / EV_gamma[1,2], intercept = 0, color = "black") +  # Second abline
  theme_minimal() +
  labs(title = "Phenotypic Trait Distribution with Fitness Colors",
       x = "Trait 1",
       y = "Trait 2") + scale_color_brewer(palette = "RdYlBu") + # Color scale
  coord_fixed()  # Keep aspect ratio fixed
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette RdYlBu is 11
## Returning the palette you asked for with that many colors
## Warning: Removed 131 rows containing missing values or values outside the scale range
## (`geom_point()`).

Eigen values are indicative of different selection (curvature of the fitness landscape) What would mean null / positive / negative eigenvalues ?

NB: How to test for non-zero gamma eigenvalues ?

phen2$ET1 = phen2$height*EV_gamma[1,1]+phen2$weight*EV_gamma[2,1]
phen2$ET2 = phen2$height*EV_gamma[1,2]+phen2$weight*EV_gamma[2,2]
plot(phen2$ET1,phen2$ET2,col=adjustcolor(colors[1+as.numeric((round(10*w)))],alpha=.2),pch=16,cex=.6,asp=1)

model_rotated <- lmer(w_log ~ -1 + I(ET1^2/2) + I(ET2^2/2) + I(ET1 * ET2) +(1|ind),data=phen2)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 15.3082 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
model_rotated_nocov <- lmer(w_log ~ -1 + I(ET1^2/2) + I(ET2^2/2) +(1|ind),data=phen2)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 4.44142 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
anova(model_rotated,model_rotated_nocov)
## refitting model(s) with ML (instead of REML)
## Data: phen2
## Models:
## model_rotated_nocov: w_log ~ -1 + I(ET1^2/2) + I(ET2^2/2) + (1 | ind)
## model_rotated: w_log ~ -1 + I(ET1^2/2) + I(ET2^2/2) + I(ET1 * ET2) + (1 | ind)
##                     npar     AIC     BIC logLik deviance Chisq Df Pr(>Chisq)
## model_rotated_nocov    4 -196704 -196680  98356  -196712                    
## model_rotated          5 -196287 -196257  98149  -196297     0  1          1
model_rotated_nocov_noET1 <- lmer(w_log ~ -1  + I(ET2^2/2) +(1|ind),data=phen2)
model_rotated_nocov_noET2 <- lmer(w_log ~ -1  + I(ET1^2/2) +(1|ind),data=phen2)  

anova(model_rotated_nocov_noET1,model_rotated_nocov)
## refitting model(s) with ML (instead of REML)
## Data: phen2
## Models:
## model_rotated_nocov_noET1: w_log ~ -1 + I(ET2^2/2) + (1 | ind)
## model_rotated_nocov: w_log ~ -1 + I(ET1^2/2) + I(ET2^2/2) + (1 | ind)
##                           npar     AIC     BIC logLik deviance  Chisq Df
## model_rotated_nocov_noET1    3   -9454   -9436   4730    -9460          
## model_rotated_nocov          4 -196704 -196680  98356  -196712 187252  1
##                           Pr(>Chisq)    
## model_rotated_nocov_noET1               
## model_rotated_nocov        < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_rotated_nocov_noET2,model_rotated_nocov)
## refitting model(s) with ML (instead of REML)
## Data: phen2
## Models:
## model_rotated_nocov_noET2: w_log ~ -1 + I(ET1^2/2) + (1 | ind)
## model_rotated_nocov: w_log ~ -1 + I(ET1^2/2) + I(ET2^2/2) + (1 | ind)
##                           npar     AIC     BIC logLik deviance  Chisq Df
## model_rotated_nocov_noET2    3    2721    2739  -1357     2715          
## model_rotated_nocov          4 -196704 -196680  98356  -196712 199427  1
##                           Pr(>Chisq)    
## model_rotated_nocov_noET2               
## model_rotated_nocov        < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3- Rotation of the phenotypic space and the alignement between matrices

We can also rotate the phenotypic space to a obtain diagonal G-matrices

eigen(Gmat1)
## eigen() decomposition
## $values
## [1] 0.55298842 0.04017184
## 
## $vectors
##            [,1]        [,2]
## [1,] 0.09549854 -0.99542957
## [2,] 0.99542957  0.09549854
eigen(Gmat2)
## eigen() decomposition
## $values
## [1] 1.0068446 0.4000877
## 
## $vectors
##            [,1]       [,2]
## [1,] -0.2395288 -0.9708893
## [2,]  0.9708893 -0.2395288

We obtain eigenvalues - the amount of genetic variance along each eigen trait, and eigenvectors that describe the rotation of the phenotypic space

e1<-eigen(Gmat1)$vectors
t(e1)%*%Gmat1%*%(e1)
##              [,1]         [,2]
## [1,] 5.529884e-01 4.772245e-18
## [2,] 3.604529e-18 4.017184e-02

The resulting matrix is a diagonal matrix with eigenvalues as the diagonal elements

e2<-eigen(Gmat2)$vectors
t(e2)%*%Gmat1%*%(e2)
##            [,1]        [,2]
## [1,]  0.4967519 -0.16023876
## [2,] -0.1602388  0.09640833

Finally, we can ask how are the gmax (the first axis of additive genetic variation) and the gamma axis aligned. To do that we usually compute angles. Here is a function to compute angles between vectors

angle_theta <- function(x, y) {
  dot.prod <- x %*% y
  norm.x <- norm(x, type = "2")
  norm.y <- norm(y, type = "2")
  theta <- 180/pi * as.numeric(acos(dot.prod/(norm.x * norm.y)))
  as.numeric(theta)
}

Gmat2 eigenvectors and gamma eigenvectors

angle_theta(e2[,1],EV_gamma[,1]) # 105°
## [1] 105.6594

Both are not oriented, if e1 is an eigenvector of Gmat2 then -e1 also

min(angle_theta(e2[,1],EV_gamma[,1]),180-angle_theta(e2[,1],EV_gamma[,1])) # 74.3°
## [1] 74.34061
min(angle_theta(e2[,1],EV_gamma[,1]),angle_theta(e2[,1],-EV_gamma[,1]))
## [1] 74.34061

Better aligned with EV2 ?

min(angle_theta(e2[,1],EV_gamma[,2]),angle_theta(e2[,2],-EV_gamma[,2])) # 15°
## [1] 15.65939

And the P matrix ?

min(angle_theta(eigen(cor(phen[,2:3]))$vectors[,1],EV_gamma[,1]),angle_theta(eigen(cor(phen[,2:3]))$vectors[,1],-EV_gamma[,1])) # 15°
## [1] 15.48188

Load the function to plot ellipses from G matrices

source("Rcode/99_Ellipse_function.R")
#plot(phen2[,2],phen2[,3],col=adjustcolor(colors[color_index],alpha=.2),pch=16,cex=.6)
#lines(ellipse_coord(cor(phen[,2:3])),col='darkgreen',lwd=2)
#lines(ellipse_coord(Gmat2),col='orange',lwd=2)
# Convert color indexing to a valid color format with transparency
phen2$plot_color <- scales::alpha(colors[color_index], alpha = 0.2)

# Compute ellipse coordinates
P_ellipse <- as.data.frame(ellipse_coord(cor(phen[, 2:3])))  # P-matrix ellipse
G_ellipse <- as.data.frame(ellipse_coord(Gmat2))  # G-matrix ellipse

colnames(P_ellipse) <- c("x", "y")
colnames(G_ellipse) <- c("x", "y")

# Create ggplot
ggplot() +
  geom_point(data = phen2, aes(x = height, y = weight, color = plot_color), size = 0.6, alpha = 0.2) +  # Scatter plot
  geom_path(data = P_ellipse, aes(x = x, y = y), color = "cyan3", size = 0.6) +  # P-matrix ellipse
  geom_path(data = G_ellipse, aes(x = x, y = y), color = "salmon", size = 0.6) +  # G-matrix ellipse
  theme_minimal() +
  labs(title = "Phenotypic and Genetic Covariance Ellipses",
       x = "Trait 1",
       y = "Trait 2",
       color = "Color Index") + scale_color_brewer(palette = "RdYlBu") +  # Color scale
  coord_fixed() + # Maintain aspect ratio 
  guides(color = "none")  # Remove the color legend
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette RdYlBu is 11
## Returning the palette you asked for with that many colors
## Warning: Removed 131 rows containing missing values or values outside the scale range
## (`geom_point()`).